## Balance test East Germany paper
## R&R JPR

rm(list = ls())
load("~/Dropbox/Current projects/GDR/Spatial diffusion/Data/Modeling data 2.RData")

dim(countydata6)
colnames(countydata6)


## balance test comparing counties with and without WGTV for all covariates included in 
## our analysis; use ITM_Dresden as measure of WGTV access
WGTV <- countydata6$ITM_Dresden
X <- countydata6[,c(12,13,59,44,21,24,45,46,30:31,28,20,19,32:35,39:40,41:43,78,81)]

X[,1] <- log(X[,1])	# logged pop size
colnames(X)[1] <- "log(population_1988)"


glm.out <- glm(WGTV ~ ., data = X, family = binomial("probit"))
summary(glm.out)

glm.out.0 <- glm(WGTV ~ 1, family = binomial("probit"))
summary(glm.out.0)

## LR test
LR.stat <- 2*(logLik(glm.out)[1] - logLik(glm.out.0)[1])
1 - pchisq(LR.stat, df = ncol(X))



## Balance statistics
library(Matching)
res <- matrix(NA,ncol(X),6)
rownames(res) <- colnames(X)
colnames(res) <- c("mean WGTV","mean non-WGTV","standiff","var ratio","t-test","ks-test")

X[,4] <- as.numeric(as.logical(X[,4]))


set.seed(123)
m <- 1
for(m in 1:ncol(X))	{

	temp <- balanceUV(Tr = X[,m][WGTV == TRUE], Co = X[,m][WGTV == FALSE], ks = TRUE,
				nboots = 20000, paired = FALSE)
	res[m,] <- c(temp$mean.Tr, temp$mean.Co, NA, temp$var.ratio,
		 			temp$p.value, temp$ks$ks.boot.pvalue)

	## use average variance, following Imbens and Rubin 2013
	res[m,3] <- (temp$mean.Tr - temp$mean.Co) / 
				sqrt((var(X[,m][WGTV == TRUE]) + var(X[,m][WGTV == FALSE]))/2)

	cat(m,"\n")

						}


library(Hmisc)
latex(round(res,3), file = "")



## change in population size stats
range(countydata6$pop_change)
sum(abs(countydata6$pop_change) > .05)
sum(abs(countydata6$pop_change) > .10)



## look at relationship between '53 and '89 protest counts
plot(countydata6$protests53_cities, countydata6$protestcount)
cor(countydata6$protests53_cities, countydata6$protestcount)

summary(lm(countydata6$protestcount ~	countydata6$protests53_cities + 
										I(countydata6$protests53_cities^2)
										))

plot(countydata6$protests53_cities[countydata6$cities_and_communities > 1], countydata6$protestcount[countydata6$cities_and_communities > 1])

summary(lm(countydata6$protestcount ~	countydata6$protests53_cities,
										subset = countydata6$cities_and_communities > 1))








,
